package com.hotutil.start;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;


import java.io.*;

public class TikaUtil {
    /**
     *
     * @param is 请传入一个InputStream流
     * @return  返回文档内容
     */
    public static String tikaParse(InputStream is) throws IOException {
            try {
                String str = "";
                //Tika默认只能读取100000个字符以内的文档，这里改成-1即可解除限制
                BodyContentHandler textHandler = new BodyContentHandler(-1); //存储文档主体
                Metadata metadata = new Metadata();  //Metadata对象保存了作者,标题等元素
                AutoDetectParser parser = new AutoDetectParser(); //当调用parse,AutoDetectParser会自动估计文档MiME类型
                parser.parse(is, textHandler, metadata);  //执行解析过程，填入三个参数,inputstream，metadata,context
                str = textHandler.toString();
                return str;
            }catch (TikaException ex){
                return "文件未去除水印";
            }catch(SAXException ex){
                return "文件未去除水印";
            }
    }

}
